# encoding=utf-8
import pandas as pd
import requests
from lxml import etree
import re
import collections
from sqlalchemy import create_engine
import time


def fund_code_name():
    """ 筛选天天基金，6千多基金机构的，最近一周收益率排在前50强基金并且存入数据库"""
    header = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36',
        'Referer': 'http://fund.eastmoney.com/data/fundranking.html',
        'Cookie': 'st_si=51694067779834; st_asi=delete; ASP.NET_SessionId=e1pno0koqkcp5es3xyzyrg1n; EMFUND1=null; EMFUND2=null; EMFUND3=null; EMFUND4=null; EMFUND5=null; EMFUND6=null; EMFUND7=null; EMFUND8=null; EMFUND0=null; _adsame_fullscreen_18503=1; EMFUND9=08-16 01:16:38@#$%u4E07%u5BB6%u65B0%u5229%u7075%u6D3B%u914D%u7F6E%u6DF7%u5408@%23%24519191; st_pvi=87492384111747; st_sp=2020-08-16%2000%3A05%3A17; st_inirUrl=http%3A%2F%2Ffund.eastmoney.com%2Fdata%2Ffundranking.html; st_sn=15; st_psi=20200816011636912-0-9218336114'

    }
    engine = create_engine('mysql+pymysql://root:Zhou@1010@localhost/stock_db', encoding='utf-8')

    # begin_date = input("y-m-d")
    end_date = time.strftime("%Y-%m-%d")
    response = requests.get(
        url='http://fund.eastmoney.com/data/rankhandler.aspx?op=ph&dt=kf&ft=all&rs=&gs=0&sc=zzf&st=desc&sd={}&'
            'ed={}&qdii=&tabSubtype=,,,,,&pi=1&pn=50&dx=1&v=0.12623255916277043'.format(end_date, end_date),
        headers=header)
    text = response.text
    # text 返回抓包
    data = text.split('=')[1]
    # print(data)得到全部数据
    compile_data = re.findall("{datas:\\[(.*)\\],allRecords", str(data))[0]
    strip_data = str(compile_data).strip('[').strip(']')
    replace_quta = strip_data.replace('"', "")
    quota_arrays = replace_quta.split(",")
    intervals = [[i * 25, (i + 1) * 25] for i in range(258)]
    # print(intervals)划分
    narrays = []
    for k in intervals:
        start, end = k[0], k[1]
        line = quota_arrays[start:end]
        narrays.append(line)
    # print(narrays)
    header = ["fund_code", "fund_name", "基金条码", "update_time",
              "net_unit_value", "cumulative_net_value", "day_growth", "week", "month", "three_months", "six_months",
              "year", "two_years", "three_years",
              "this_year", "establish", "其他1", "其他2", "其他3", "其他4", "buy_fee_rate", "其他6", "其他7", "其他8", "其他9"]
    df = pd.DataFrame(narrays, columns=header)
    df_part = df[["fund_code", "fund_name", "update_time",
                  "net_unit_value", "cumulative_net_value", "day_growth", "week", "month", "three_months", "six_months",
                  "year", "two_years",
                  "three_years", "this_year", "establish", "buy_fee_rate"]]
    df_tmp = df_part.sort_values(by=["week"], ascending=False, axis=0)
    rank_fund_code = df_tmp.head(50)["fund_code"]
    fund_codes_list = rank_fund_code.values.tolist()
    pd.io.sql.to_sql(df_tmp.head(50), 'management_fund', con=engine, if_exists='append', index=False)

    # print(df_tmp.head(50))
    # print("前50强基金：", fund_codes_list)
    # df_tmp.head(50).to_csv("./本季度前50强基金收益.csv", encoding="utf_8_sig")
    return fund_codes_list


def get_one_fund_stocks(fund_code):
    """根据基金码,获取每一支基金的最新一季度所有持仓股票池前10支股票"""
    url = "http://fundf10.eastmoney.com/FundArchivesDatas.aspx?type=jjcc&code={}&topline=10&year=&month=&rt=0.5032668912422176".format(
        fund_code)
    head = {
        "Cookie": "EMFUND1=null; EMFUND2=null; EMFUND3=null; EMFUND4=null; EMFUND5=null; EMFUND6=null; EMFUND7=null;"
                  " EMFUND8=null; EMFUND0=null; st_si=44023331838789; st_asi=delete; EMFUND9=08-16 "
                  "22:04:25@#$%u4E07%u5BB6%u65B0%u5229%u7075%u6D3B%u914D%u7F6E%u6DF7%u5408@%23%24519191;"
                  " ASP.NET_SessionId=45qdofapdlm1hlgxapxuxhe1; st_pvi=87492384111747; st_sp=2020-08-16%2000%3A05%3A17;"
                  " st_inirUrl=http%3A%2F%2Ffund.eastmoney.com%2Fdata%2Ffundranking.html; st_sn=12; "
                  "st_psi=2020081622103685-0-6169905557"
        ,
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36"}

    response = requests.get(url, headers=head)
    text = response.text  # html subsitue text
    div = re.findall('content:\\"(.*)\\",arryear', text)[0]
    html_body = '<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>test</title></head><body>%s</body></html>' % (
        div)
    html = etree.HTML(html_body)
    stock_info = html.xpath('//div[1]/div/table/tbody/tr/td/a')
    # stock_money = html.xpath('//div[1]/div/table/tbody/tr/td')
    stock_one_fund = []
    # 所有基金加其持有10股票得到列表的列表
    all_fund_stocks = []
    stock_one_fund.append(fund_code)
    for stock in stock_info:
        if stock.text and stock.text.isdigit():
            stock_one_fund.append(stock.text)

    # if len(stock_one_fund) > 1:
    #     # 列表第一行加入了基金的代码,所以第一个值不能取
    #
    #     print("基金代码：{}".format(fund_code), "基金持有前10股票池", stock_one_fund[1:])
    return stock_one_fund  # can return empty list


def static_best_stock(rank=20):
    """ 统计收益最佳前50机构共同持有股票代码情况,修改rank数量可调整展示股票排名数目"""
    rank_codes = fund_code_name()
    stocks_array = []
    f_s_list = []
    for index, code in enumerate(rank_codes):
        if index < 1:
            print("<" * 30 + "FBI WARNING近1周收益最高基金的排名高到低排序以及股票池情况" + ">" * 30)
        stocks_of_one_fund = get_one_fund_stocks(code)
        f_s_list.append(stocks_of_one_fund)
        stocks = get_one_fund_stocks(code)[1:]
        if len(stocks) > 1 and stocks:
            stocks_array.extend(stocks)
    # print(stocks_array)
    # print(f_s_list)
    data = pd.DataFrame(f_s_list,
                        columns=['fund_code', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine',
                                 'ten'])
    # print(data)
    engine = create_engine('mysql+pymysql://root:Zhou@1010@localhost/stock_db', encoding='utf-8')
    pd.io.sql.to_sql(data, 'management_stock', con=engine, if_exists='append', index=False)

    count_each_stock = collections.Counter(stocks_array)
    print("<" * 30 + "FBI WARNING,{}".format(static_best_stock.__doc__) + ">" * 30)
    print("#" * 30 + "本季度基金机构共同持有股票数目排行前{}股票代码情况".format(rank) + "#" * 30)
    df = pd.DataFrame.from_dict(count_each_stock, orient='index', columns=["hold"])
    df = df.reset_index().rename(columns={"index": "stock_code"})
    # for k, v in count_each_stock.items():
    #     print("股票代码: ", k, "持有该股票机构数量: ", v)
    df = df.sort_values(by="hold", ascending=False)
    # print(df.head(rank))
    pd.io.sql.to_sql(df.head(rank), 'management_rank_stock', con=engine, if_exists='append', index=False)


if __name__ == '__main__':
    static_best_stock()
